** Clearing Stata memory
capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.17: (All Choices) Priority Subjects and Gender Performance Gap ///////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Phase 2 norm_scores dataset 
use "Work Data/Gender_Phase2_long.dta",clear
 
*************** Adjust courses and check variables ***************

d prior* curso* career*
drop prior_*

replace curso2=62 if curso2==31
replace curso3=62 if curso3==31

*************** Priority disciplines - 2nd and 3rd Choice ***************

gen second_choice=(curso2!=.)
gen third_choice=(curso3!=.)

rename curso2 career2
merge m:1 career2  using "Original data/priority_discipline.dta"
renvarlab prior_port- prior_lang, prefix(second_)
tab _merge if career2~=.
drop if _merge==2
drop career1 career3 _merge

* Correct for changes in priority disciplines over time:
* Philosophy: in 2000 and 2001, Portuguese and Mathematics were priority disciplines. Starting in 2002, only Portuguese was a priority discipline.
replace second_prior_math=1 if (year==2000 | year==2001) & career2==30
* Dentistry: in 2000 and 2001, it required Biology and Chemistry as priority disciplines. Starting in 2002, only Biology was required.
replace second_prior_chem=1 if (year==2000 | year==2001) & career2==14
* Physical Education: Until 2002, only Biology was priority discipline. Starting in 2003, Biology and History.
replace second_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career2==27
replace second_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career2==45
* Arts:	in 2000, only Portuguese was priority discipline
replace second_prior_hist=0 if year==2000 & career2==25

rename curso3 career3
merge m:1 career3  using "Original data/priority_discipline.dta"
renvarlab prior_port- prior_lang, prefix(third_)
drop if _merge==2
tab _merge if career3~=.
drop career1 _merge

* Correct for changes in priority disciplines over time:
* Philosophy: in 2000 and 2001, Portuguese and Mathematics were priority disciplines. Starting in 2002, only Portuguese was a priority discipline.
replace third_prior_math=1 if (year==2000 | year==2001) & career3==30
* Dentistry: in 2000 and 2001, it required Biology and Chemistry as priority disciplines. Starting in 2002, only Biology was required.
replace third_prior_chem=1 if (year==2000 | year==2001) & career3==14
* Physical Education: Until 2002, only Biology was priority discipline. Starting in 2003, Biology and History.
replace third_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career3==27
replace third_prior_hist=0 if (year==2000 | year==2001 | year==2002) & career3==45
* Arts:	in 2000, only Portuguese was priority discipline
replace third_prior_hist=0 if year==2000 & career3==25

******** Generate priority dummies ********

gen priority_second=0 if career2~=.
levelsof subject, local(levels) 
foreach s of local levels {
replace priority_second=second_prior_`s' if subject=="`s'"
}
tab priority_second
tab priority priority_second, row
label var priority_second "Priority 2\textsuperscript{nd} Choice"
mdesc priority_second

gen priority_third=0 if career3~=.
levelsof subject, local(levels) 
foreach s of local levels {
replace priority_third=third_prior_`s' if subject=="`s'" 
}
tab priority_third
tab priority priority_third, row
label var priority_third "Priority 3\textsuperscript{rd} Choice"
mdesc priority_third

************************************************************
************ SECOND/THIRD (NOT FIRST) CHOICE ***************
************************************************************

gen priority_other_notfirst=((priority_second==1 | priority_third==1) & priority==0)
tab priority_other_notfirst
label var priority_other_notfirst "Priority Other Choice, Not First"
gen fem_priority_2_3rd_not1st=female*priority_other_notfirst
label var fem_priority_2_3rd_not1st "Female $\times$ Priority Other Choice, Not First"

************* Creating other control variables

encode subject, gen (sub)
tab subject, gen (d_sub)
label var sub "Subject"

** Subject dummies
rename d_sub1 Biology
rename d_sub2 Chemistry
rename d_sub3 Geography
rename d_sub4 History
rename d_sub5 Language
rename d_sub6 Mathematics
rename d_sub7 Physics
rename d_sub8 Portuguese
* Labels
label var Biology "Biology"
label var Chemistry "Chemistry"
label var Geography "Geography"
label var History "History"
label var Math "Mathematics"
label var Physics "Physics"
label var Portuguese "Portuguese"
label var Language "Foreign Language"

** Interaction: priority X female
gen fem_priority=female*priority
label var fem_priority "Female $\times$ Priority"
foreach v of varlist Biology-Portuguese {
gen fem_`v'=`v'*female
label var fem_`v' "Female $\times$ `v'"
gen prio_`v'=priority*`v'
label var prio_`v' "Priority $\times$ `v'"
gen fem_prio_`v'=fem_priority*`v'
label var fem_prio_`v' "Female $\times$ Priority $\times$ `v'"
}

global subject "Chemistry Geography History Mathematics Physics"
global subject_fem "fem_Chemistry fem_Geography fem_History fem_Mathematics fem_Physics"

** P1 scores: P1 normalized subject-specific scores
forvalues i=2(1)4 {
gen norm_p1score`i'=norm_p1score^`i'
sum norm_p1score`i'
}
global pol_norm_p1score norm_p1score*
d $pol_norm_p1score

*********************************************************************************
****************   Relative performances ****************************************
*********************************************************************************

** ENEM

foreach v in norm_enem_w {
bys year female: egen `v'_ave_g=mean(`v')
gen `v'_g=`v'-`v'_ave_g
bys year female: sum `v'_g
}
drop norm_enem_w_ave_g

* Interaction: subject X ENEM
foreach v of varlist Biology-Portuguese {
gen enem_`v'=`v'*norm_enem_w_g
label var enem_`v' "ENEM $\times$ `v'"
gen fem_enem_`v'=female*norm_enem_w_g*`v'
label var fem_enem_`v' "Female $\times$ ENEM $\times$ `v'"
forvalues i=2(1)4 {
gen enem_`v'_`i'=enem_`v'^`i'
gen fem_enem_`v'_`i'=fem_enem_`v'^`i'
}
sum enem_`v'* fem_enem_`v'*
}

global g_pol_enem_sub "enem_Chemistry* enem_Geography* enem_History* enem_Mathematics* enem_Physics*"
d $g_pol_enem_sub

**** Priority x relative performance in ENEM:

* First choice
foreach v in norm_enem_w {
gen `v'_priority_g=`v'_g*priority
forvalues i=2(1)4 {
gen `v'_priority_g`i'=`v'_g^`i'*priority
sum `v'_priority_g`i'
}
}

global g_norm_enem_w_prio norm_enem_w_priority_g*
d $g_norm_enem_w_prio

* Second/Third (not First) choice
foreach v in norm_enem_w {
gen `v'_prio2_3rd_not1st_g=`v'_g*priority_other_notfirst
forvalues i=2(1)4 {
gen `v'_prio2_3rd_not1st_g`i'=`v'_g^`i'*priority_other_notfirst
sum `v'_prio2_3rd_not1st_g`i'
}
}

global g_norm_enem_w_prio2_3rd_not1st norm_enem_w_prio2_3rd_not1st_g*
d $g_norm_enem_w_prio2_3rd_not1st

** Phase 1 scores

foreach v in norm_p1score {

bys year subject female: egen gs_`v'_ave=mean(`v')
gen gs_`v'=`v'-gs_`v'_ave
bys year female subject: sum gs_`v'
drop gs_`v'_ave

forvalues i=2(1)4 {
gen gs_`v'`i'=gs_`v'^`i'
sum gs_`v'`i'
}

global gs_pol_`v' gs_`v' gs_`v'2 gs_`v'3 gs_`v'4
d $gs_pol_`v'

* Priority x Phase 1 scores:
gen gs_`v'_prio=gs_`v'*priority
forvalues i=2(1)4 {
gen gs_`v'_prio`i'=gs_`v'`i'*priority
sum gs_`v'_prio*
}

* Priority second/third (not first) x Phase 1 scores:
gen gs_`v'_prio_not1st=gs_`v'*priority_other_notfirst
forvalues i=2(1)4 {
gen gs_`v'_prio_not1st`i'=gs_`v'`i'*priority_other_notfirst
sum gs_`v'_prio_not1st*
}

}

global gs_pol_norm_p1score_prio gs_norm_p1score_prio gs_norm_p1score_prio2 gs_norm_p1score_prio3 gs_norm_p1score_prio4
d $gs_pol_norm_p1score_prio
global gs_pol_norm_p1score_prio_not1st gs_norm_p1score_prio_not1st*
d $gs_pol_norm_p1score_prio_not1st

*********************************************************************************
**************** Main sample ****************************************************
*********************************************************************************

* 1) Only years before the affirmative action took place
drop if aa_year==1
drop if year==2000
tab year

* 2) Drop Portuguese and Foreign Language (in Phase 1 there is no Portuguese or Foreign Language exams - For Portuguese Phase 1 has an essay)
 tab subject, sum(norm_p1score)
 drop if subject=="lang" | subject=="port" 
 tab subject, sum(norm_p1score)
 rename priority_Portuguese priopor
 drop *Language* *Portuguese* 
 rename priopor priority_Portuguese
 
*********************************************************************************
****************   Regressions **************************************************
*********************************************************************************

** Labeling variables
label var priority "Priority"
label var female "Female"
label var norm_enem_w_g "ENEM"

estimates clear

reg norm_score female priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st norm_enem_w_g , cluster(inscri2) 
estimates store reg1
reg norm_score female priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st norm_enem_w_g $subject $subject_fem , cluster(inscri2) 
estimates store reg2
reghdfe norm_score priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st $subject $subject_fem, cluster(inscri2) absorb(inscri2)
estimates store reg3
reghdfe norm_score priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st $subject $subject_fem $g_pol_enem_sub, cluster(inscri2) absorb(inscri2)
estimates store reg4
reghdfe norm_score priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $g_norm_enem_w_prio2_3rd_not1st, cluster(inscri2) absorb(inscri2)
estimates store reg5
reghdfe norm_score priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $g_norm_enem_w_prio2_3rd_not1st $gs_pol_norm_p1score , cluster(inscri2) absorb(inscri2)
estimates store reg6
reghdfe norm_score priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st $subject $subject_fem $g_pol_enem_sub $g_norm_enem_w_prio $g_norm_enem_w_prio2_3rd_not1st $gs_pol_norm_p1score $gs_pol_norm_p1score_prio $gs_pol_norm_p1score_prio_not1st, cluster(inscri2) absorb(inscri2)
estimates store reg7

estadd local sub_fe "No":  reg1
estadd local sub_fe "Yes": reg2 reg3 reg4 reg5 reg6 reg7

estadd local subgender_fe "No":  reg1
estadd local subgender_fe "Yes": reg2 reg3 reg4 reg5 reg6 reg7

estadd local ind_fe "No": reg1 reg2 
estadd local ind_fe "Yes": reg3 reg4 reg5 reg6 reg7

estadd local enemsub "No":  reg1 reg2 reg3 
estadd local enemsub "Yes": reg4 reg5 reg6 reg7

estadd local enemprio_pol4 "No":  reg1 reg2 reg3 reg4 
estadd local enemprio_pol4 "Yes": reg5 reg6 reg7

estadd local p1score_pol4 "No": reg1 reg2 reg3 reg4 reg5
estadd local p1score_pol4 "Yes": reg6 reg7

estadd local p1scoreprio_pol4 "No": reg1 reg2 reg3 reg4 reg5 reg6
estadd local p1scoreprio_pol4 "Yes": reg7 

* Tex
esttab reg1 reg2 reg3 reg4 reg5 reg6 reg7 using "Output/p_Gender_Score_Second_Third_NotFirst.tex", se star(* 0.10 ** 0.05 *** 0.01) nogap ///
stats(r2_a N N_clust sep sub_fe subgender_fe ind_fe enemsub  enemprio_pol4 p1score_pol4 p1scoreprio_pol4 , fmt(%9.3fc %9.0fc %9.0fc %1s %3s %3s %3s %3s %3s %3s %3s %3s) /// 
labels("$\bar{R}^2$" "Number of observations"  "Number of applicants" " " "Subject FE" "Subject-gender FE" "Individual FE" "ENEM $\times$ Subject FE" "ENEM $\times$ Priority" "Phase 1 scores"  "Phase 1 scores $\times$ Priority"  )) b(%7.3f) se(%7.3f)  booktabs replace f label nomtitle collabels(none) keep(female priority fem_priority priority_other_notfirst fem_priority_2_3rd_not1st norm_enem_w_g) refcat(female " \\ \multicolumn{8}{l}{\textit{Dependent variable: Phase 2 normalized subject-specific scores}} \\", nolabel)
